*diff in diff analysis by voucher redesign
*last modified: 1 April 2025
*last modified by: Charlotte Ambrozek
*this do file implements the csdid2 package to estimate individual 2x2 doubly robust did estimates and aggregate to obtain an att 

*-------------------------------------------------------------------------------
*--- Preamble
*-------------------------------------------------------------------------------

clear all
set more off
set rmsg on

set maxvar 120000
set emptycells drop

*ssc install csdid, replace
*ssc install drdid, replace
*net install csdid2, from("https://friosavila.github.io/stpackages")

*-------------------------------------------------------------------------------
*--- Directories and Log 
*-------------------------------------------------------------------------------

local data_dir ./data/cleaned
local raw_dir ./data/raw
local out_dir ./analysis/output
local graph_dir ./analysis/output/graphs
local tab_dir ./analysis/output/tables
local log_dir ./documentation/logs
local date: display %tdYY-NN-DD date(c(current_date), "DMY")
di "`date'"
capture log close

log using `log_dir'/voucher_csdid_tip`date', replace

*-------------------------------------------------------------------------------
*--- Data and Frames 
*-------------------------------------------------------------------------------

*import cleaned eWIC implementation info into own frame
*eWIC implementation variable is at county fips/fiscal year level 
*partition data into state/treatment year groups

*import cleaned ewic implementation info into own frame
mkf ewic_fy
frame ewic_fy{
	use ./data/cleaned/ewic_rollout.dta, clear
	rename year fiscalyear
	drop pre* post* 
}

mkf income
frame income{
	use ./data/cleaned/countyyearmedianincome.dta, clear
	keep if year == 2005
	drop year
	destring ct_fips, replace
}

mkf countychars
frame countychars{
	use ./data/cleaned/countystats.dta, clear
	keep if year == 2005
	drop year
	destring ct_fips, replace
}

mkf demos
frame demos{
	use ./data/cleaned/demos.dta, clear
	keep if year == 2005
	drop year
	destring ct_fips, replace
}

cwf ewic_fy
frame put ct_fips ev_year state, into(timings)

cwf timings
duplicates drop
encode state, generate(state_code)

frlink m:1 ct_fips, frame(countychars) generate(countylink)
frget medincome sharepoor shareltwic sharelt2fpl sharesnap sharecash sharecashorsnap, from(countylink)
frlink m:1 ct_fips, frame(demos) generate(demoslink)
frget total_pop under5_pop hispanic_total_pop total_black_pop region division urbanrural share_black share_hispanic share_under5, from(demoslink)
gen log_pop = log(total_pop)

*import TIP store data into own frame
mkf tip_fy
frame tip_fy{
	local data_dir ./data/cleaned
	use `data_dir'/tip_auth_sq, clear
	*drop "Direct Distribution Center" and "Home Food Delivery Contractor" types
	gen f = (vendor_type1 == 3 & (vendor_type2 == 4 | missing(vendor_type2))) | (vendor_type1 == 4 & (vendor_type2 == 3 | missing(vendor_type2)))
	bys tip_id: egen flag = mode(f)
	bys tip_id: replace vendor_type1 = vendor_type1[_n+1] if flag == 0 & f == 1
	drop if flag == 1
	drop flag f tip_year_id
	*fill in gaps in state fips using modes
	generate str_fips = string(ct_fips)
	replace str_fips = "0" + str_fips if strlen(str_fips) == 4
	generate st_fips = substr(str_fips, 1, 2)
	destring st_fips, replace 
	
	*drop Vermont and mississippi because of different regiemes prior to eWIC
	*drop NV because ewic turn on then off then on again and we don't have reliable timings
	*drop missing st_fips
	drop if st_fips == 50 | st_fips == 32 | st_fips == 28 | missing(st_fips)

	*flag stores that specialize in WIC
	gen a50 = vendor_type1 == 1 | vendor_type1 == 7 | vendor_type2 == 1 | vendor_type2 == 7
	
	*link in low income/high poverty info
	frlink m:1 ct_fips, frame(income) generate(inc_link)
	frget lowinc highpov highwicelig, from(inc_link)

	*link in controls from timings frame
	frlink m:1 ct_fips, frame(timings) generate(timings_link)
	frget total_pop under5_pop hispanic_total_pop total_black_pop region division urbanrural share_black share_hispanic share_under5 medincome sharepoor shareltwic sharelt2fpl sharesnap sharecash sharecashorsnap, from(timings_link)

	*link ewic implementation info
	frlink m:1 ct_fips fiscalyear, frame(ewic_fy) generate(ewic_link)
	frget ev_year , from(ewic_link)
	* assume treated at earliest exposure
	bys tip_id: egen ey_min = min(ev_year)
	
	* assume treated at earliest exposure
	replace ev_year = ey_min if ev_year != ey_min & !missing(ey_min)
	bys tip_id (ev_year): assert ev_year[1] == ev_year[_N]

	*assert no dups at future level of xtset 
	duplicates report tip_id fiscalyear 
	assert r(N) == r(unique_value)
	xtset tip_id fiscalyear
	compress
}

cwf tip_fy

*-------------------------------------------------------------------------------
*--- Estimates: Store-level
*-------------------------------------------------------------------------------

cd `out_dir'

xtset tip_id fiscalyear
xtdes

mkf csdidsimple_auth float(b se pval ll ul) str25(outcome subsample)
mkf csdidevent_auth float(b se pval ll ul) int(ev_yr) str25(outcome subsample)
mkf csdideventavg_auth float(avgb avgse avgpval avgll avgul) int(times) str25(time outcome subsample)
mkf csdidgroup_auth float(b se pval ll ul) str25(gr_yr outcome subsample)

*csdid auth, ivar(tip_id) time(fiscalyear) gvar(ev_year) notyet long2 saverif(csdid_voucher_auth) replace cluster(st_fips)

 mkf post
 cwf post
 use csdid_voucher_auth, clear
 keep if ev_year <= 2009
 csdid_stats simple
 frame post csdidsimple_auth (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) ("auth") ("before redesign")
 csdid_stats event, window(-3 4)
*the structure of the returned matrix here is columns pre_avg post_avg tm3 ... tm1 tp0 tp1 ... tp4
*i assume that tm# is pre period # and tp# is post period #
* so we want to capture columns 3 through 11 for the event periods, plus the pre/post averages (in a separate frame that can be linked on later).
frame post csdideventavg_auth (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) (4) ("pre") ("auth") ("before redesign")
frame post csdideventavg_auth (r(table)[1,2]) (r(table)[2,2]) (r(table)[4,2]) (r(table)[5,2]) (r(table)[6,2]) (5) ("post") ("auth") ("before redesign")
local i = 2
forval y = -3/4{
	local i = `i' + 1
	frame post csdidevent_auth (r(table)[1,`i']) (r(table)[2,`i']) (r(table)[4,`i']) (r(table)[5,`i']) (r(table)[6,`i']) (`y') ("auth") ("before redesign")
}
use csdid_tip_auth, clear
csdid_stats group
local names: colnames r(table)
local names = subinstr("`names'", "G", "", .)
forval y = 1/10{
	local year: word `y' of `names'
	frame post csdidgroup_auth (r(table)[1,`y']) (r(table)[2,`y']) (r(table)[4,`y']) (r(table)[5,`y']) (r(table)[6,`y']) ("`year'") ("auth") ("before redesign")
}
 use csdid_voucher_auth, clear
 keep if ev_year > 2009
 csdid_stats simple
 frame post csdidsimple_auth (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) ("auth") ("after redesign")
 csdid_stats event, window(-3 4)
 *the structure of the returned matrix here is columns pre_avg post_avg tm3 ... tm1 tp0 tp1 ... tp4
 *i assume that tm# is pre period # and tp# is post period #
 * so we want to capture columns 3 through 11 for the event periods, plus the pre/post averages (in a separate frame that can be linked on later).
 frame post csdideventavg_auth (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) (4) ("pre") ("auth") ("after redesign")
 frame post csdideventavg_auth (r(table)[1,2]) (r(table)[2,2]) (r(table)[4,2]) (r(table)[5,2]) (r(table)[6,2]) (5) ("post") ("auth") ("after redesign")
 local i = 2
 forval y = -3/4{
 	local i = `i' + 1
 	frame post csdidevent_auth (r(table)[1,`i']) (r(table)[2,`i']) (r(table)[4,`i']) (r(table)[5,`i']) (r(table)[6,`i']) (`y') ("auth") ("after redesign")
 }
 use csdid_tip_auth, clear
 csdid_stats group
local names: colnames r(table)
local names = subinstr("`names'", "G", "", .)
forval y = 1/10{
	local year: word `y' of `names'
	frame post csdidgroup_auth (r(table)[1,`y']) (r(table)[2,`y']) (r(table)[4,`y']) (r(table)[5,`y']) (r(table)[6,`y']) ("`year'") ("auth") ("after redesign")
}

/* *-------------------------------------------------------------------------------
*--- Estimates: County-level
*-------------------------------------------------------------------------------

cwf tip_fy

gen n=1
des

*Collapse at conty-year level: # stores, entries, exits, eWIC implementation 

#delimit ;
collapse (sum)n (sum)entry (sum)exit (min)ev_year (mean)st_fips,    
         by(ct_fips fiscalyear);
#delimit cr

rename n n_stores

gen voucher = (fiscalyear>=2009) 

xtset ct_fips fiscalyear
xtdes 

cd `out_dir'

*Outcomes : n_stores is almost constant - no enough variation for identification
*           no long2 - conformability error
local outcomes entry exit

foreach out of local outcomes {

 mkf csdidsimple_`out' float(b se pval ll ul) str25(outcome subsample)
 mkf csdidevent_`out' float(b se pval ll ul) int(ev_yr) str25(outcome subsample)
 mkf csdideventavg_`out' float(avgb avgse avgpval avgll avgul) int(times) str25(time outcome subsample)
 mkf csdidgroup_`out' float(b se pval ll ul) str25(gr_yr outcome subsample)
 
 foreach v of numlist 0 1 {
  
  if `v'==0 local lab "before redesign"
  if `v'==1 local lab "after redesign"
  
  csdid `out' if voucher==`v', ivar(ct_fips) time(fiscalyear) gvar(ev_year) notyet /*long2*/ saverif(csdid_voucher_`out') replace cluster(st_fips)

  estat simple
  frame post csdidsimple_`out' (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) ("`out'") ("`lab'")
  estat event, window(-4 4)
  *the structure of the returned matrix here is columns pre_avg post_avg tm4 tm3 ... tm1 tp0 tp1 ... tp4
  *i assume that tm# is pre period # and tp# is post period #
  * so we want to capture columns 3 through 11 for the event periods, plus the pre/post averages (in a separate frame that can be linked on later).
  frame post csdideventavg_`out' (r(table)[1,1]) (r(table)[2,1]) (r(table)[4,1]) (r(table)[5,1]) (r(table)[6,1]) (4) ("pre") ("`out'") ("`lab'")
  frame post csdideventavg_`out' (r(table)[1,2]) (r(table)[2,2]) (r(table)[4,2]) (r(table)[5,2]) (r(table)[6,2]) (5) ("post") ("`out'") ("`lab'")
  local i = 2
  forval y = -4/4{
	local i = `i' + 1
	frame post csdidevent_`out' (r(table)[1,`i']) (r(table)[2,`i']) (r(table)[4,`i']) (r(table)[5,`i']) (r(table)[6,`i']) (`y') ("`out'") ("`lab'")
  }

  estat group
  local names: colnames r(table)
  local names = subinstr("`names'", "G", "", .)
  forval y = 1/10{
	local year: word `y' of `names'
	frame post csdidgroup_`out' (r(table)[1,`y']) (r(table)[2,`y']) (r(table)[4,`y']) (r(table)[5,`y']) (r(table)[6,`y']) ("`year'") ("`out'") ("`lab'")
  } 
 }
} */

*-------------------------------------------------------------------------------
*--- Export Results
*-------------------------------------------------------------------------------
frame csdidgroup_auth{
	list gr_yr in 1/20
	drop if gr_yr == "Average"
	destring gr_yr, generate(tmt_yr)
	drop if subsample == "before redesign" & tmt_yr > 2009
	drop if subsample == "after redesign" & tmt_yr <= 2009
	frlink m:1 outcome subsample, frame(csdidsimple_auth)
	frget b se pval ll ul, from(csdidsimple_auth) prefix("avg")
}

frame csdideventavg_auth{
	expand times, generate(new)
	bys time outcome subsample (new): gen ev_yr = sum(new)
	replace ev_yr = ev_yr - 4 if time == "pre"
	drop time new times
}

cwf csdidevent_auth
frlink 1:1 ev_yr outcome subsample, frame(csdideventavg_auth)
frget avgb avgse avgpval avgll avgul, from(csdideventavg_auth)
drop csdideventavg_auth

frame drop csdideventavg_auth

cwf csdidsimple_auth
save `out_dir'/voucher_csdid_auth_simple.dta, replace
cwf csdidevent_auth
save `out_dir'/voucher_csdid_auth_event.dta, replace  
cwf csdidgroup_auth
save `out_dir'/voucher_csdid_auth_group.dta, replace  

/*  
foreach out of local outcomes {
  frame csdideventavg_`out'{
	expand times, generate(new)
	bys time outcome subsample (new): gen ev_yr = sum(new)
	replace ev_yr = ev_yr - 4 if time == "pre"
	drop time new times
  }

  cwf csdidevent_`out'
  frlink 1:1 ev_yr outcome subsample, frame(csdideventavg_`out')
  frget avgb avgse avgpval avgll avgul, from(csdideventavg_`out')
  drop csdideventavg_`out'

  frame drop csdideventavg_`out'

  cwf csdidsimple_`out'
  save `out_dir'/voucher_csdid_`out'_simple.dta, replace
  cwf csdidevent_`out'
  save `out_dir'/voucher_csdid_`out'_event.dta, replace  
  cwf csdidgroup_`out'
  save `out_dir'/voucher_csdid_`out'_group.dta, replace 
 } */


log close
